/*
	Cleaning script for study 1 and study 2.
	Identifying variables have been removed from the raw files.

*/


/*-------------------------------------------
	Study 1
---------------------------------------------*/

insheet using study_1_raw.csv, comma  clear


// Demographics

	gen r_age = age 
	
	gen male = gender == 1
	gen female = gender == 2

	gen hhi_1 = inrange(hhi, 1, 4) // <30k
	gen hhi_2 = inrange(hhi, 5, 10) // 30-60k
	gen hhi_3 = inrange(hhi, 11, 18) // 60-100k
	gen hhi_4 = inlist(hhi, 19, 20, 21, 22) // 100-200k
	gen hhi_5 = inlist(hhi, 23, 24) // 200+
	gen hhi_6 = hhi == -3105 // prefer not say

	gen edu_nohs = educ == 1
	gen edu_hs = inlist(educ, 2, 3)
	gen edu_somecol = educ == 4
	gen edu_2yr = educ == 5
	gen edu_ba = educ == 6
	gen edu_postgrad = inlist(educ, 7, 8)

	gen reg_ne = region == 1
	gen reg_mw = region == 2
	gen reg_s = region == 3
	gen reg_w = region == 4
	

// Respondent Race
	gen r_white = dem_race == "White"
	gen r_black = dem_race == "Black or African American"
	gen r_asian = dem_race == "Asian"
	gen r_hispanic = dem_race == "Hispanic or Latino"
	gen r_other = missing(r_white) & missing(r_black) & missing(r_asian) & missing(r_hispanic)

// Respondent PID
	rename political_party r_pid
	gen r_dem = inlist(r_pid, 1, 2, 3, 6)
	gen r_ind = inlist(r_pid, 4)
	gen r_gop = inlist(r_pid, 5, 8, 9, 10)
	label var r_dem "Respondent PID, Democrats (Incl. Leaners)"
	label var r_ind "Respondent PID, Pure Independents"
	label var r_gop "Respondent PID, Republicans (Incl. Leaners)"
	label var r_pid "Respondent PID from Lucid"

	
// Candidate Profile Attributes

	cap confirm numeric variable cattr_age
	di _rc
	if _rc != 0 gen cand_age = real(cattr_age)
	if _rc == 0 gen cand_age = cattr_age
	gen cand_black = cattr_race == "Black"
	gen cand_white = cattr_race == "White"
	gen cand_female = cattr_sex == "woman"
	gen cand_male = cattr_sex == "man"
	
	label var cand_age "Candidate Age"
	label var cand_black "Black"
	label var cand_white "White"
	label var cand_female "Female"
	label var cand_male "Male"
		
	
	loc attr "cand_policy_abort1 cand_policy_abort2 cand_policy_tax1 cand_policy_tax2 cand_policy_health1"
	loc attr2 "cand_policy_health2 cand_policy_eco1 cand_policy_eco2 cand_policy_aa1 cand_policy_aa2 cand_policy_aa3"
	
	foreach var in `attr' `attr2' {
		gen `var' = 0
	}
	/* 
		Use key phrases to determine issue area.
		strpos return 0 if text is not found, so if !=0, then phrase is in variable
	*/
	
	foreach x in a b c {	
		replace cand_policy_abort2 = 1 if strpos(cattr_policy`x', "first trimester") != 0
		replace cand_policy_abort1 = 1 if strpos(cattr_policy`x', "including the third trimester") != 0
		
		replace cand_policy_tax1 = 1 if strpos(cattr_policy`x', "Increase the tax rate") != 0
		replace cand_policy_tax2 = 1 if strpos(cattr_policy`x', "Maintain the current tax rate") != 0
		
		replace cand_policy_health1 = 1 if strpos(cattr_policy`x', "Expand healthcare coverage") != 0
		replace cand_policy_health2 = 1 if strpos(cattr_policy`x', "Maintain current government") != 0
		
		replace cand_policy_eco1 = 1 if strpos(cattr_policy`x', "Expand investment in renewable") != 0
		replace cand_policy_eco2 = 1 if strpos(cattr_policy`x', "Maintain current patterns") != 0
		
		replace cand_policy_aa1 = 1 if strpos(cattr_policy`x', "Expand affirmative action") != 0
		replace cand_policy_aa2 = 1 if strpos(cattr_policy`x', "Maintain existing affirmative action") != 0
		replace cand_policy_aa3 = 1 if strpos(cattr_policy`x', "Replace affirmative action") != 0
	}
		
	label var cand_policy_abort1 "Allow abortion, any time"
	label var cand_policy_abort2 "Allow abortion up to 2nd tri."
	label var cand_policy_tax1 "Increase tax rate on rich"
	label var cand_policy_tax2 "Maintain tax rate on rich"
	label var cand_policy_health1 "Expand health coverage"
	label var cand_policy_health2 "Maintain health coverage policies"
	label var cand_policy_eco1 "Expand investment in energy"
	label var cand_policy_eco2 "Maintain investment in energy"
	label var cand_policy_aa1 "Expand affirmative action (race)"
	label var cand_policy_aa2 "Keep affirmative action as is"
	label var cand_policy_aa3 "Replace affirmative action (class)"
	
	// Check that each policy type appears only once
	egen temp_tax = rowtotal(cand_policy_tax*)
	egen temp_abort = rowtotal(cand_policy_abort*)
	egen temp_health = rowtotal(cand_policy_health*)
	egen temp_eco = rowtotal(cand_policy_eco*)
	egen temp_aa = rowtotal(cand_policy_aa*)
	codebook temp_*
	drop temp_*
	
	gen has_racepolicy = (cand_policy_aa1 == 1) | (cand_policy_aa2 == 1) | (cand_policy_aa3 == 1)
	label var has_racepolicy "Candidate profile has a racial policy"
	
// Outcomes on Candidate inferences
	gen out_ideo_7 = .
	gen out_ideo_econ = .
	gen out_ideo_soc = .
	
	rename cattr_ideo7 cattr_ideo_7
	foreach var in econ soc 7 {
		replace out_ideo_`var' = 1 if cattr_ideo_`var' == "Extremely Liberal"
		replace out_ideo_`var' = 0.8334 if cattr_ideo_`var' == "Liberal"
		replace out_ideo_`var' = 0.6667 if cattr_ideo_`var' == "Slightly Liberal"
		replace out_ideo_`var' = 0.5 if cattr_ideo_`var' == "Moderate"
		replace out_ideo_`var' = 0.3334 if cattr_ideo_`var' == "Slightly Conservative"
		replace out_ideo_`var' = 0.1667 if cattr_ideo_`var' == "Conservative"
		replace out_ideo_`var' = 0 if cattr_ideo_`var' == "Extremely Conservative"
	}
	label var out_ideo_7 "Perceived Liberalness (0-1)"
	label var out_ideo_econ "Perceived Liberalness, Economic Issues"
	label var out_ideo_soc "Perceived Liberalness, Social Issues"
	
	
	// 1 low priority, 3 high priority
	foreach var in tax job health enviro abort crim sj {
		gen out_priority_`var' = 0 if cattr_priority_`var' == "Low priority"
		replace out_priority_`var' = 0.5 if cattr_priority_`var' == "Moderate priority"
		replace out_priority_`var' = 1 if cattr_priority_`var' == "High priority"
	}
	
	// 1 very unfair, 7 very fair
	foreach var in white black asian hispanic gop dem men women {
		gen out_fair_`var' = 0 if cattr_fair_`var' == "Very Unfair"
		replace out_fair_`var' = 0.1667 if cattr_fair_`var' == "Unfair"
		replace out_fair_`var' = 0.3334 if cattr_fair_`var' == "Somewhat Unfair"
		replace out_fair_`var' = 0.5 if cattr_fair_`var' == "Neutral"
		replace out_fair_`var' = 0.6667 if cattr_fair_`var' == "Somewhat Fair"
		replace out_fair_`var' = 0.8334 if cattr_fair_`var' == "Fair"
		replace out_fair_`var' = 1 if cattr_fair_`var' == "Very Fair"
	}
	
// Inference on policy position, 1 liberal, 3 conservative
	gen out_policy_tanf = 1 if cattr_policy_tanf == "Remove"
	replace out_policy_tanf = 0.5 if cattr_policy_tanf == "Keep"
	replace out_policy_tanf = 0 if cattr_policy_tanf == "Reduce"
	label var out_policy_tanf "Inferred Policy Liberalness, TANF"
	
	gen out_policy_minwage = 1 if strpos(cattr_policy_minwage, "$15.00") != 0
	replace out_policy_minwage = 0.5 if strpos(cattr_policy_minwage, "$10.00") != 0
	replace out_policy_minwage = 0 if strpos(cattr_policy_minwage, "$7.25") != 0
	label var out_policy_minwage "Inferred Policy Liberalness, Min. Wage"
	
	gen out_policy_repar = 1 if cattr_policy_repar == "Cash"
	replace out_policy_repar = 0.5 if cattr_policy_repar == "Preferential treatment"
	replace out_policy_repar = 0 if cattr_policy_repar == "No benefits"
	label var out_policy_repar "Inferred Policy Liberalness, Reparations"

// Racial Resentment
	forvalues x = 1/4 {
		gen rr_`x' = .
		replace rr_`x' = 0 if sr`x' == "Disagree strongly"
		replace rr_`x' = 0.25 if sr`x' == "Disagree somewhat"
		replace rr_`x' = 0.5 if sr`x' == "Neither agree nor disagree"
		replace rr_`x' = 0.75 if sr`x' == "Agree somewhat"
		replace rr_`x' = 1 if sr`x' == "Agree strongly"
	
	}
	// Agreement = racially resentful, sr1, s4 | Disagreement = racially resentful, sr2, s3
	// recode to make high values/agreement mean racially resentful
	recode rr_2 (0=1) (0.25=0.75) (0.5=0.5) (0.75=0.25) (1=0) 
	recode rr_3 (0=1) (0.25=0.75) (0.5=0.5) (0.75=0.25) (1=0) 
	egen resent_scale = rowmean(rr_*)
	
	// Generate binary based on party-mean
	gen resent_binary = .
	foreach p in dem gop ind {
		qui summ resent_scale if r_`p' == 1
		loc rr = r(mean)
		replace resent_binary = resent_scale > `rr' if r_`p' == 1
	
		// Check
		summ resent_scale if r_`p'==1 & resent_binary == 1, d
		summ resent_scale if r_`p'==1 & resent_binary == 0, d
		summ resent_scale if r_`p'==1, d
	}
	
/* 
	Overt Racism
	overt_lazy, Hardworking (1) - Lazy (7) 
	overt_unintel, Intelligent (1) - Unintelligent (7)
	---- peaceful and trust need to be inverted
	overt_peaceful, Violent (1) - Peaceful (7)
	overt_trust, Untrustworthy (1) - Trustworthy (7)
*/
	foreach y in lazy unintel peaceful trust {
	forvalues x = 1/4{
		cap drop temp
		gen temp = real(overt_`y'_`x')
		drop overt_`y'_`x'
		rename temp overt_`y'_`x'
	}
	}
	// Black - White, if >0 then black rated more negatively
	gen overt_diff_lazy = overt_lazy_2 - overt_lazy_1
	gen overt_diff_unintel = overt_unintel_2 - overt_unintel_1
	gen overt_diff_peace = overt_peaceful_1 - overt_peaceful_2 // >0 means white rated more positively
	gen overt_diff_trust = overt_trust_1 - overt_trust_2
	
	egen overt_scale = rowmean(overt_diff*)
	// Generate binary based on party-mean
	gen overt_binary = .
	foreach p in dem gop ind {
		qui summ overt_scale if r_`p' == 1
		loc rr = r(mean)
		replace overt_binary = overt_scale > `rr' if r_`p' == 1
	
		// Check
		summ overt_scale if r_`p'==1 & overt_binary == 1, d
		summ overt_scale if r_`p'==1 & overt_binary == 0, d
		summ overt_scale if r_`p'==1, d
	}

	
	keep r_* dem_* cand_* has_race out_* rr_* resent_* overt_diff* overt_scale overt_binary cattr_* ///
		hhi_* male female edu_* reg_*
	
	save data_study_1.dta, replace

	
/*-------------------------------------------
	Study 2
---------------------------------------------*/


	insheet using study_2_raw.csv, comma name clear

	// Drop people who failed attention check
		keep if acq_pass == "1" | acq_identity == "Because he left his ID"
		
	// Qualtrics demographics
		gen r_age = 2021 - real(dem_birth)
		
		// Respondent Race
		gen r_white = dem_race == "White"
		gen r_black = dem_race == "Black or African American"
		gen r_asian = dem_race == "Asian"
		gen r_hispanic = dem_race == "Hispanic or Latino"
		gen r_other = missing(r_white) & missing(r_black) & missing(r_asian) & missing(r_hispanic)
		
		// Party
		gen r_dem = dem_pid == "Democrat"
		gen r_gop = dem_pid == "Republican"
		gen r_ind = dem_pid == "Independent"
		
		label var r_dem "Respondent PID, Democrats (Incl. Leaners)"
		label var r_ind "Respondent PID, Pure Independents"
		label var r_gop "Respondent PID, Republicans (Incl. Leaners)"
		label var dem_pid "Respondent PID from Qualtrics"

	keep r_* cattr* cand* 

	foreach var of varlist cattr_fair_* {
		di "`var'"
		loc n = real(substr("`var'", -1, 1))
		if `n' == . rename `var' `var'__1
		else {
			loc n2 = `n'+1
			loc varname = subinstr("`var'", "_`n'", "", .)
			rename `var' `varname'__`n2'
		}
	}
	foreach var of varlist cattr_fair_* {
		loc x = subinstr("`var'", "__", "_", .)
		rename `var' `x'
	}

	// I probably could have done a reshape first, but it is what it is.
	forvalues n = 1/5 {
		// Outcomes
		// perceived candidate ideology 7pts
		gen out_ideo_`n' = 1 if cand`n'_ideo == "Extremely Liberal"
		replace out_ideo_`n' = 0.8334 if cand`n'_ideo == "Liberal"
		replace out_ideo_`n' = 0.6667 if cand`n'_ideo == "Slightly Liberal"
		replace out_ideo_`n' = 0.5 if cand`n'_ideo == "Moderate/Middle of the road"
		replace out_ideo_`n' = 0.3334 if cand`n'_ideo == "Slightly Conservative"
		replace out_ideo_`n' = 0.1667 if cand`n'_ideo == "Conservative"
		replace out_ideo_`n' = 0 if cand`n'_ideo == "Extremely Conservative"
		
		// vote
		gen out_vote_`n' = 1 if cand`n'_vote == "Very likely"
		replace out_vote_`n' = 0.75 if cand`n'_vote == "Somewhat likely"
		replace out_vote_`n' = 0.5 if cand`n'_vote == "Equally likely or unlikely"
		replace out_vote_`n' = 0.25 if cand`n'_vote == "Somewhat unlikely"
		replace out_vote_`n' = 0 if cand`n'_vote == "Very unlikely"
		
		// 1 very unfair, 7 very fair
		foreach var in whites blacks asians hispanics gop dem men women {
			gen out_fair_`var'_`n' = 0 if cattr_fair_`var'_`n' == "None at all"
			replace out_fair_`var'_`n' = 0.25 if cattr_fair_`var'_`n' == "A littlebit"
			replace out_fair_`var'_`n' = 0.5 if cattr_fair_`var'_`n' == "Somewhat"
			replace out_fair_`var'_`n' = 0.75 if cattr_fair_`var'_`n' == "A moderateamount"
			replace out_fair_`var'_`n' = 1 if cattr_fair_`var'_`n' == "A greatdeal"
		}
		
	// Candidate Profile Attributes
	foreach var of varlist cattr`n'_* {
		loc name = subinstr("`var'", "`n'_", "_", .)
		rename `var' `name'_`n'
	}

}
	
	drop cattr_fair* cand*
	
	reshape long out_ideo_ out_vote_ out_fair_whites_ out_fair_blacks_ ///
		out_fair_asians_ out_fair_hispanics_ out_fair_gop_ out_fair_dem_ ///
		out_fair_men_ out_fair_women_ cattr_age_ cattr_sex_ cattr_race_ ///
		cattr_exp_ cattr_nrp1_ cattr_nrp2_ cattr_rp_ cattr_policya_ ///
		cattr_policyb_ cattr_policyc_ cattr_biden_ cattr_white_ ///
		cattr_black_ cattr_hispa_ cattr_asian_ cattr_other_, i(r_*) j(cand) s
		
	foreach var of varlist cattr_* out_* {
		loc l = strlen("`var'") - 1
		loc x = substr("`var'", 1, `l')
		rename `var' `x'
	}
	
	
// Clean Candidate Profile Attributes

	gen cand_age = real(cattr_age)
	gen cand_black = cattr_race == "Black"
	gen cand_white = cattr_race == "White"
	gen cand_asian = cattr_race == "Asian"
	gen cand_hispa = cattr_race == "Hispanic"
	gen cand_female = cattr_sex == "woman"
	gen cand_male = cattr_sex == "man"
	
	gen cand_exp_teach = strpos(cattr_exp, "high school teacher") != 0
	gen cand_exp_council = strpos(cattr_exp, "city councilor") != 0
	gen cand_exp_lawyer = strpos(cattr_exp, "local attorney") != 0
	gen cand_exp_business = strpos(cattr_exp, "business owner") != 0
	gen cand_exp_newcomer = strpos(cattr_exp, "political newcomer") != 0
	
	forvalues v = 51(2)59 {
		gen cand_biden_p`v' = cattr_biden == "`v'"
		label var cand_biden_p`v' "Vote Share: `v'%"
	}
	
	// Candidate district
	gen cand_dist1 = cattr_white == "23"
	gen cand_dist2 = cattr_white == "21"
	gen cand_dist3 = cattr_white == "28"
	gen cand_dist4 = cattr_white == "53"
	gen cand_dist5 = cattr_white == "55"
	gen cand_dist6 = cattr_white == "59"
	gen cand_dist7 = cattr_white == "63"
	label var cand_dist1 "[23, 20, 21, 31, 5]"
	label var cand_dist2 "[21, 16, 51, 8, 4]"
	label var cand_dist3 "[28, 53, 9, 6, 4]"
	label var cand_dist4 "[53, 23, 12, 7, 5]"
	label var cand_dist5 "[55, 10, 23, 8, 4]"
	label var cand_dist6 "[59, 16, 14, 7, 4]"
	label var cand_dist7 "[63, 8, 13, 11, 5]"
	

	label var cand_age "Candidate Age"
	label var cand_black "Black"
	label var cand_white "White"
	label var cand_asian "Asian"
	label var cand_hispa "Hispanic"
	label var cand_female "Female"
	label var cand_male "Male"
	
	label var cand_exp_teach "High school teacher"
	label var cand_exp_council "City councilor"
	label var cand_exp_lawyer "Local attorney"
	label var cand_exp_business "Local business owner"
	label var cand_exp_newcomer "Political newcomer"

	/* 
		Use key phrases to determine issue area.
		strpos return 0 if text is not found, so if !=0, then phrase is in variable
	*/
	loc attr "cand_policy_abort1 cand_policy_abort2 cand_policy_tax1 cand_policy_tax2 cand_policy_health1"
	loc attr2 "cand_policy_health2 cand_policy_eco1 cand_policy_eco2 cand_policy_aa1 cand_policy_aa2 cand_policy_aa3"
	
	foreach var in `attr' `attr2' {
		gen `var' = 0
	}
	
	foreach x in a b c {	
		replace cand_policy_abort2 = 1 if strpos(cattr_policy`x', "first trimester") != 0
		replace cand_policy_abort1 = 1 if strpos(cattr_policy`x', "including the third trimester") != 0
		
		replace cand_policy_tax1 = 1 if strpos(cattr_policy`x', "Increase the tax rate") != 0
		replace cand_policy_tax2 = 1 if strpos(cattr_policy`x', "Maintain the current tax rate") != 0
		
		replace cand_policy_health1 = 1 if strpos(cattr_policy`x', "Replace private health") != 0
		replace cand_policy_health2 = 1 if strpos(cattr_policy`x', "Maintain current Obamacare") != 0
		
		replace cand_policy_eco1 = 1 if strpos(cattr_policy`x', "Expand investment in renewable") != 0
		replace cand_policy_eco2 = 1 if strpos(cattr_policy`x', "Maintain current patterns") != 0
		
		replace cand_policy_aa1 = 1 if strpos(cattr_policy`x', "Expand affirmative action") != 0
		replace cand_policy_aa2 = 1 if strpos(cattr_policy`x', "Maintain existing affirmative action") != 0
		replace cand_policy_aa3 = 1 if strpos(cattr_policy`x', "End affirmative action") != 0
	}
			
		
	label var cand_policy_abort1 "Allow abortion, any time"
	label var cand_policy_abort2 "Allow abortion up to 2nd tri."
	label var cand_policy_tax1 "Increase tax rate on rich"
	label var cand_policy_tax2 "Maintain tax rate on rich"
	label var cand_policy_health1 "Expand health coverage"
	label var cand_policy_health2 "Maintain health coverage"
	label var cand_policy_eco1 "Expand investment in energy"
	label var cand_policy_eco2 "Maintain investment in energy"
	label var cand_policy_aa1 "Expand affirmative action"
	label var cand_policy_aa2 "Keep affirmative action as is"
	label var cand_policy_aa3 "End affirmative action"
	
	// Check that each policy type appears only once
	egen temp_tax = rowtotal(cand_policy_tax*)
	egen temp_abort = rowtotal(cand_policy_abort*)
	egen temp_health = rowtotal(cand_policy_health*)
	egen temp_eco = rowtotal(cand_policy_eco*)
	egen temp_aa = rowtotal(cand_policy_aa*)
	codebook temp_*
	drop temp_*
	
	gen has_racepolicy = (cand_policy_aa1 == 1) | (cand_policy_aa2 == 1) | (cand_policy_aa3 == 1)
	label var has_racepolicy "Candidate profile has a racial policy"

	save data_study_2.dta, replace

